{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "\n",
    "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Merging Dataframes\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Cost</th>\n",
       "      <th>Item Purchased</th>\n",
       "      <th>Name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Store 1</th>\n",
       "      <td>22.5</td>\n",
       "      <td>Sponge</td>\n",
       "      <td>Chris</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Store 1</th>\n",
       "      <td>2.5</td>\n",
       "      <td>Kitty Litter</td>\n",
       "      <td>Kevyn</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Store 2</th>\n",
       "      <td>5.0</td>\n",
       "      <td>Spoon</td>\n",
       "      <td>Filip</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Cost Item Purchased   Name\n",
       "Store 1  22.5         Sponge  Chris\n",
       "Store 1   2.5   Kitty Litter  Kevyn\n",
       "Store 2   5.0          Spoon  Filip"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame([{'Name': 'Chris', 'Item Purchased': 'Sponge', 'Cost': 22.50},\n",
    "                   {'Name': 'Kevyn', 'Item Purchased': 'Kitty Litter', 'Cost': 2.50},\n",
    "                   {'Name': 'Filip', 'Item Purchased': 'Spoon', 'Cost': 5.00}],\n",
    "                  index=['Store 1', 'Store 1', 'Store 2'])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Cost</th>\n",
       "      <th>Item Purchased</th>\n",
       "      <th>Name</th>\n",
       "      <th>Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Store 1</th>\n",
       "      <td>22.5</td>\n",
       "      <td>Sponge</td>\n",
       "      <td>Chris</td>\n",
       "      <td>December 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Store 1</th>\n",
       "      <td>2.5</td>\n",
       "      <td>Kitty Litter</td>\n",
       "      <td>Kevyn</td>\n",
       "      <td>January 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Store 2</th>\n",
       "      <td>5.0</td>\n",
       "      <td>Spoon</td>\n",
       "      <td>Filip</td>\n",
       "      <td>mid-May</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Cost Item Purchased   Name        Date\n",
       "Store 1  22.5         Sponge  Chris  December 1\n",
       "Store 1   2.5   Kitty Litter  Kevyn   January 1\n",
       "Store 2   5.0          Spoon  Filip     mid-May"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Date'] = ['December 1', 'January 1', 'mid-May']\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['Delivered'] = True\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['Feedback'] = ['Positive', None, 'Negative']\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "adf = df.reset_index()\n",
    "adf['Date'] = pd.Series({0: 'December 1', 2: 'mid-May'})\n",
    "adf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                 Role\n",
      "Name                 \n",
      "Kelly  Director of HR\n",
      "Sally  Course liasion\n",
      "James          Grader\n",
      "\n",
      "            School\n",
      "Name              \n",
      "James     Business\n",
      "Mike           Law\n",
      "Sally  Engineering\n"
     ]
    }
   ],
   "source": [
    "staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR'},\n",
    "                         {'Name': 'Sally', 'Role': 'Course liasion'},\n",
    "                         {'Name': 'James', 'Role': 'Grader'}])\n",
    "staff_df = staff_df.set_index('Name')\n",
    "student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business'},\n",
    "                           {'Name': 'Mike', 'School': 'Law'},\n",
    "                           {'Name': 'Sally', 'School': 'Engineering'}])\n",
    "student_df = student_df.set_index('Name')\n",
    "print(staff_df.head())\n",
    "print()\n",
    "print(student_df.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Role</th>\n",
       "      <th>School</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>James</th>\n",
       "      <td>Grader</td>\n",
       "      <td>Business</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Kelly</th>\n",
       "      <td>Director of HR</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mike</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Law</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sally</th>\n",
       "      <td>Course liasion</td>\n",
       "      <td>Engineering</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Role       School\n",
       "Name                              \n",
       "James          Grader     Business\n",
       "Kelly  Director of HR          NaN\n",
       "Mike              NaN          Law\n",
       "Sally  Course liasion  Engineering"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(staff_df, student_df, how='outer', left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pd.merge(staff_df, student_df, how='inner', left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Role</th>\n",
       "      <th>School</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Kelly</th>\n",
       "      <td>Director of HR</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sally</th>\n",
       "      <td>Course liasion</td>\n",
       "      <td>Engineering</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>James</th>\n",
       "      <td>Grader</td>\n",
       "      <td>Business</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Role       School\n",
       "Name                              \n",
       "Kelly  Director of HR          NaN\n",
       "Sally  Course liasion  Engineering\n",
       "James          Grader     Business"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(staff_df, student_df, how='left', left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pd.merge(staff_df, student_df, how='right', left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Role</th>\n",
       "      <th>School</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Kelly</td>\n",
       "      <td>Director of HR</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sally</td>\n",
       "      <td>Course liasion</td>\n",
       "      <td>Engineering</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>James</td>\n",
       "      <td>Grader</td>\n",
       "      <td>Business</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Name            Role       School\n",
       "0  Kelly  Director of HR          NaN\n",
       "1  Sally  Course liasion  Engineering\n",
       "2  James          Grader     Business"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "staff_df = staff_df.reset_index()\n",
    "student_df = student_df.reset_index()\n",
    "pd.merge(staff_df, student_df, how='left', left_on='Name', right_on='Name')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR', 'Location': 'State Street'},\n",
    "                         {'Name': 'Sally', 'Role': 'Course liasion', 'Location': 'Washington Avenue'},\n",
    "                         {'Name': 'James', 'Role': 'Grader', 'Location': 'Washington Avenue'}])\n",
    "student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business', 'Location': '1024 Billiard Avenue'},\n",
    "                           {'Name': 'Mike', 'School': 'Law', 'Location': 'Fraternity House #22'},\n",
    "                           {'Name': 'Sally', 'School': 'Engineering', 'Location': '512 Wilson Crescent'}])\n",
    "pd.merge(staff_df, student_df, how='left', left_on='Name', right_on='Name')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "staff_df = pd.DataFrame([{'First Name': 'Kelly', 'Last Name': 'Desjardins', 'Role': 'Director of HR'},\n",
    "                         {'First Name': 'Sally', 'Last Name': 'Brooks', 'Role': 'Course liasion'},\n",
    "                         {'First Name': 'James', 'Last Name': 'Wilde', 'Role': 'Grader'}])\n",
    "student_df = pd.DataFrame([{'First Name': 'James', 'Last Name': 'Hammond', 'School': 'Business'},\n",
    "                           {'First Name': 'Mike', 'Last Name': 'Smith', 'School': 'Law'},\n",
    "                           {'First Name': 'Sally', 'Last Name': 'Brooks', 'School': 'Engineering'}])\n",
    "staff_df\n",
    "student_df\n",
    "pd.merge(staff_df, student_df, how='inner', left_on=['First Name','Last Name'], right_on=['First Name','Last Name'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Idiomatic Pandas: Making Code Pandorable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv('census.csv')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(df.where(df['SUMLEV']==50)\n",
    "    .dropna()\n",
    "    .set_index(['STNAME','CTYNAME'])\n",
    "    .rename(columns={'ESTIMATESBASE2010': 'Estimates Base 2010'}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = df[df['SUMLEV']==50]\n",
    "df.set_index(['STNAME','CTYNAME'], inplace=True)\n",
    "df.rename(columns={'ESTIMATESBASE2010': 'Estimates Base 2010'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "def min_max(row):\n",
    "    data = row[['POPESTIMATE2010',\n",
    "                'POPESTIMATE2011',\n",
    "                'POPESTIMATE2012',\n",
    "                'POPESTIMATE2013',\n",
    "                'POPESTIMATE2014',\n",
    "                'POPESTIMATE2015']]\n",
    "    return pd.Series({'min': np.min(data), 'max': np.max(data)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.apply(min_max, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "def min_max(row):\n",
    "    data = row[['POPESTIMATE2010',\n",
    "                'POPESTIMATE2011',\n",
    "                'POPESTIMATE2012',\n",
    "                'POPESTIMATE2013',\n",
    "                'POPESTIMATE2014',\n",
    "                'POPESTIMATE2015']]\n",
    "    row['max'] = np.max(data)\n",
    "    row['min'] = np.min(data)\n",
    "    return row\n",
    "df.apply(min_max, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "rows = ['POPESTIMATE2010',\n",
    "        'POPESTIMATE2011',\n",
    "        'POPESTIMATE2012',\n",
    "        'POPESTIMATE2013',\n",
    "        'POPESTIMATE2014',\n",
    "        'POPESTIMATE2015']\n",
    "df.apply(lambda x: np.max(x[rows]), axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Group by"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "df = pd.read_csv('census.csv')\n",
    "df = df[df['SUMLEV']==50]\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%%timeit -n 10\n",
    "for state in df['STNAME'].unique():\n",
    "    avg = np.average(df.where(df['STNAME']==state).dropna()['CENSUS2010POP'])\n",
    "    print('Counties in state ' + state + ' have an average population of ' + str(avg))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "%%timeit -n 10\n",
    "for group, frame in df.groupby('STNAME'):\n",
    "    avg = np.average(frame['CENSUS2010POP'])\n",
    "    print('Counties in state ' + group + ' have an average population of ' + str(avg))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = df.set_index('STNAME')\n",
    "\n",
    "def fun(item):\n",
    "    if item[0]<'M':\n",
    "        return 0\n",
    "    if item[0]<'Q':\n",
    "        return 1\n",
    "    return 2\n",
    "\n",
    "for group, frame in df.groupby(fun):\n",
    "    print('There are ' + str(len(frame)) + ' records in group ' + str(group) + ' for processing.')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv('census.csv')\n",
    "df = df[df['SUMLEV']==50]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.groupby('STNAME').agg({'CENSUS2010POP': np.average})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "print(type(df.groupby(level=0)['POPESTIMATE2010','POPESTIMATE2011']))\n",
    "print(type(df.groupby(level=0)['POPESTIMATE2010']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(df.set_index('STNAME').groupby(level=0)['CENSUS2010POP']\n",
    "    .agg({'avg': np.average, 'sum': np.sum}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(df.set_index('STNAME').groupby(level=0)['POPESTIMATE2010','POPESTIMATE2011']\n",
    "    .agg({'avg': np.average, 'sum': np.sum}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(df.set_index('STNAME').groupby(level=0)['POPESTIMATE2010','POPESTIMATE2011']\n",
    "    .agg({'POPESTIMATE2010': np.average, 'POPESTIMATE2011': np.sum}))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Scales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.DataFrame(['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D'],\n",
    "                  index=['excellent', 'excellent', 'excellent', 'good', 'good', 'good', 'ok', 'ok', 'ok', 'poor', 'poor'])\n",
    "df.rename(columns={0: 'Grades'}, inplace=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['Grades'].astype('category').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "grades = df['Grades'].astype('category',\n",
    "                             categories=['D', 'D+', 'C-', 'C', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+'],\n",
    "                             ordered=True)\n",
    "grades.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "grades > 'C'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv('census.csv')\n",
    "df = df[df['SUMLEV']==50]\n",
    "df = df.set_index('STNAME').groupby(level=0)['CENSUS2010POP'].agg({'avg': np.average})\n",
    "pd.cut(df['avg'],10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pivot Tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#http://open.canada.ca/data/en/dataset/98f1a129-f628-4ce4-b24d-6f16bf24dd64\n",
    "df = pd.read_csv('cars.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.pivot_table(values='(kW)', index='YEAR', columns='Make', aggfunc=np.mean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.pivot_table(values='(kW)', index='YEAR', columns='Make', aggfunc=[np.mean,np.min], margins=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Date Functionality in Pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Timestamp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2016-09-01 10:05:00')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Timestamp('9/1/2016 10:05AM')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Period"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Period('2016-01', 'M')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Period('1/2016')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Period('2016-03-05', 'D')"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Period('3/5/2016')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### DatetimeIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2016-09-01    a\n",
       "2016-09-02    b\n",
       "2016-09-03    c\n",
       "dtype: object"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t1 = pd.Series(list('abc'), [pd.Timestamp('2016-09-01'), pd.Timestamp('2016-09-02'), pd.Timestamp('2016-09-03')])\n",
    "t1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.tseries.index.DatetimeIndex"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(t1.index)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### PeriodIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2016-09    d\n",
       "2016-10    e\n",
       "2016-11    f\n",
       "Freq: M, dtype: object"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t2 = pd.Series(list('def'), [pd.Period('2016-09'), pd.Period('2016-10'), pd.Period('2016-11')])\n",
    "t2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.tseries.period.PeriodIndex"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(t2.index)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Converting to Datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2 June 2013</th>\n",
       "      <td>16</td>\n",
       "      <td>46</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Aug 29, 2014</th>\n",
       "      <td>14</td>\n",
       "      <td>66</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-06-26</th>\n",
       "      <td>59</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7/12/16</th>\n",
       "      <td>27</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               a   b\n",
       "2 June 2013   16  46\n",
       "Aug 29, 2014  14  66\n",
       "2015-06-26    59  99\n",
       "7/12/16       27  17"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d1 = ['2 June 2013', 'Aug 29, 2014', '2015-06-26', '7/12/16']\n",
    "ts3 = pd.DataFrame(np.random.randint(10, 100, (4,2)), index=d1, columns=list('ab'))\n",
    "ts3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-06-02</th>\n",
       "      <td>16</td>\n",
       "      <td>46</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-08-29</th>\n",
       "      <td>14</td>\n",
       "      <td>66</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-06-26</th>\n",
       "      <td>59</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-07-12</th>\n",
       "      <td>27</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             a   b\n",
       "2013-06-02  16  46\n",
       "2014-08-29  14  66\n",
       "2015-06-26  59  99\n",
       "2016-07-12  27  17"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts3.index = pd.to_datetime(ts3.index)\n",
    "ts3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2012-07-04 00:00:00')"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.to_datetime('4.7.12', dayfirst=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Timedeltas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timedelta('2 days 00:00:00')"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Timestamp('9/3/2016')-pd.Timestamp('9/1/2016')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2016-09-14 11:10:00')"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Timestamp('9/2/2016 8:10AM') + pd.Timedelta('12D 3H')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Working with Dates in a Dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2016-10-02', '2016-10-16', '2016-10-30', '2016-11-13',\n",
       "               '2016-11-27', '2016-12-11', '2016-12-25', '2017-01-08',\n",
       "               '2017-01-22'],\n",
       "              dtype='datetime64[ns]', freq='2W-SUN')"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dates = pd.date_range('10-01-2016', periods=9, freq='2W-SUN')\n",
    "dates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Count 1</th>\n",
       "      <th>Count 2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2016-10-02</th>\n",
       "      <td>104</td>\n",
       "      <td>125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-10-16</th>\n",
       "      <td>109</td>\n",
       "      <td>122</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-10-30</th>\n",
       "      <td>111</td>\n",
       "      <td>127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-11-13</th>\n",
       "      <td>117</td>\n",
       "      <td>126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-11-27</th>\n",
       "      <td>114</td>\n",
       "      <td>126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-12-11</th>\n",
       "      <td>109</td>\n",
       "      <td>121</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-12-25</th>\n",
       "      <td>105</td>\n",
       "      <td>126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-01-08</th>\n",
       "      <td>105</td>\n",
       "      <td>125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-01-22</th>\n",
       "      <td>101</td>\n",
       "      <td>123</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Count 1  Count 2\n",
       "2016-10-02      104      125\n",
       "2016-10-16      109      122\n",
       "2016-10-30      111      127\n",
       "2016-11-13      117      126\n",
       "2016-11-27      114      126\n",
       "2016-12-11      109      121\n",
       "2016-12-25      105      126\n",
       "2017-01-08      105      125\n",
       "2017-01-22      101      123"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({'Count 1': 100 + np.random.randint(-5, 10, 9).cumsum(),\n",
    "                  'Count 2': 120 + np.random.randint(-5, 10, 9)}, index=dates)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Sunday', 'Sunday', 'Sunday', 'Sunday', 'Sunday', 'Sunday',\n",
       "       'Sunday', 'Sunday', 'Sunday'], dtype=object)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.index.weekday_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Count 1</th>\n",
       "      <th>Count 2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2016-10-02</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-10-16</th>\n",
       "      <td>5.0</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-10-30</th>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-11-13</th>\n",
       "      <td>6.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-11-27</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-12-11</th>\n",
       "      <td>-5.0</td>\n",
       "      <td>-5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-12-25</th>\n",
       "      <td>-4.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-01-08</th>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-01-22</th>\n",
       "      <td>-4.0</td>\n",
       "      <td>-2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Count 1  Count 2\n",
       "2016-10-02      NaN      NaN\n",
       "2016-10-16      5.0     -3.0\n",
       "2016-10-30      2.0      5.0\n",
       "2016-11-13      6.0     -1.0\n",
       "2016-11-27     -3.0      0.0\n",
       "2016-12-11     -5.0     -5.0\n",
       "2016-12-25     -4.0      5.0\n",
       "2017-01-08      0.0     -1.0\n",
       "2017-01-22     -4.0     -2.0"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.diff()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Count 1</th>\n",
       "      <th>Count 2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2016-10-31</th>\n",
       "      <td>108.0</td>\n",
       "      <td>124.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-11-30</th>\n",
       "      <td>115.5</td>\n",
       "      <td>126.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-12-31</th>\n",
       "      <td>107.0</td>\n",
       "      <td>123.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-01-31</th>\n",
       "      <td>103.0</td>\n",
       "      <td>124.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Count 1     Count 2\n",
       "2016-10-31    108.0  124.666667\n",
       "2016-11-30    115.5  126.000000\n",
       "2016-12-31    107.0  123.500000\n",
       "2017-01-31    103.0  124.000000"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.resample('M').mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Count 1</th>\n",
       "      <th>Count 2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2017-01-08</th>\n",
       "      <td>105</td>\n",
       "      <td>125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-01-22</th>\n",
       "      <td>101</td>\n",
       "      <td>123</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Count 1  Count 2\n",
       "2017-01-08      105      125\n",
       "2017-01-22      101      123"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['2017']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Count 1</th>\n",
       "      <th>Count 2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2016-12-11</th>\n",
       "      <td>109</td>\n",
       "      <td>121</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-12-25</th>\n",
       "      <td>105</td>\n",
       "      <td>126</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Count 1  Count 2\n",
       "2016-12-11      109      121\n",
       "2016-12-25      105      126"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['2016-12']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Count 1</th>\n",
       "      <th>Count 2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2016-12-11</th>\n",
       "      <td>109</td>\n",
       "      <td>121</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016-12-25</th>\n",
       "      <td>105</td>\n",
       "      <td>126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-01-08</th>\n",
       "      <td>105</td>\n",
       "      <td>125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-01-22</th>\n",
       "      <td>101</td>\n",
       "      <td>123</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Count 1  Count 2\n",
       "2016-12-11      109      121\n",
       "2016-12-25      105      126\n",
       "2017-01-08      105      125\n",
       "2017-01-22      101      123"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['2016-12':]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.asfreq('W', method='ffill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "df.plot()"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Raw Cell Format",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
